Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)df <- read.csv("../data/rawdata_participants.csv") |>
mutate(across(everything(), ~ifelse(.x == "", NA, .x))) |>
mutate(Experimenter = case_when(
Language=="English" & Experimenter == "reddit7" ~ "Reddit (other)",
Language=="English" & Experimenter == "reddit8" ~ "Reddit (other)",
Language=="English" & Experimenter == "reddit1" ~ "Reddit (other)",
.default = Experimenter
))
dftask <- read.csv("../data/rawdata_task.csv") |>
full_join(
df[c("Participant", "Sex", "SexualOrientation")],
by = join_by(Participant)
)The initial sample consisted of 409 participants (Mean age = 35.7, SD = 12.4, range: [18, 80]; Sex: 18.3% females, 80.4% males, 1.2% other; Education: Bachelor, 38.88%; Doctorate, 7.33%; High School, 23.47%; Master, 28.12%; Other, 1.47%; Primary School, 0.73%; Country: 28.61% USA, 13.45% France, 13.20% UK, 44.74% other).
# Create Sexual "relevance" variable (Relevant, irrelevant, non-erotic)
dftask <- dftask |>
mutate(Relevance = case_when(
Type == "Non-erotic" ~ "Non-erotic",
Sex == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
Sex == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
# TODO: what to do with "Other"?
SexualOrientation %in% c("Bisexual", "Other") & Category %in% c("Male", "Female") ~ "Relevant",
.default = "Irrelevant"
)) # Consecutive count of participants per day (as area)
df |>
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
group_by(Date, Language, Experimenter) |>
summarize(N = n()) |>
ungroup() |>
# https://bocoup.com/blog/padding-time-series-with-r
complete(Date, Language, Experimenter, fill = list(N = 0)) |>
group_by(Language, Experimenter) |>
mutate(N = cumsum(N)) |>
ggplot(aes(x = Date, y = N)) +
geom_area(aes(fill=Experimenter)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Recruitment History",
x = "Date",
y = "Total Number of Participants"
) +
facet_wrap(~Language, nrow=3) +
see::theme_modern() # Table
summarize(df, N = n(), .by=c("Language", "Experimenter")) |>
arrange(desc(N)) |>
gt::gt() |>
gt::opt_stylize() |>
gt::opt_interactive(use_compact_mode = TRUE) |>
gt::tab_header("Number of participants per recruitment source")The majority of participants found the study to be a “fun” experience. Interstingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.
df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
group_by(Question, Answer) |>
summarise(prop = n()/nrow(df), .groups = 'drop') |>
complete(Question, Answer, fill = list(prop = 0)) |>
filter(Answer == "True") |>
mutate(Question = str_remove(Question, "Feedback_"),
Question = str_replace(Question, "AILessArousing", "AI = Less arousing"),
Question = str_replace(Question, "AIMoreArousing", "AI = More arousing"),
Question = str_replace(Question, "CouldNotDiscriminate", "Hard to discriminate"),
Question = str_replace(Question, "LabelsIncorrect", "Labels were incorrect"),
Question = str_replace(Question, "NoFeels", "Didn't feel anything"),
Question = str_replace(Question, "CouldDiscriminate", "Easy to discriminate"),
Question = str_replace(Question, "LabelsReversed", "Labels were reversed")) |>
mutate(Question = fct_reorder(Question, desc(prop))) |>
ggplot(aes(x = Question, y = prop)) +
geom_bar(stat = "identity") +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
labs(x="Feedback", y = "Participants", title = "Feedback") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank()
)cor <- df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
mutate_all(~ifelse(.=="True", 1, 0)) |>
correlation(method="tetrachoric", redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()For i = 2 j = 1 A cell entry of 0 was replaced with correct = 0.5. Check your data!
For i = 2 j = 1 A cell entry of 0 was replaced with correct = 0.5. Check your data!
cor |>
mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = rho), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Feedback Co-occurence Matrix") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))outliers <- c(
# "S206" # Collapsed RTs in both phases
# "S399" # Negative Arousal-Valence correlations
)
potentials <- list()df |>
ggplot(aes(x=Mobile, fill=Mobile)) +
geom_bar() +
geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
theme_modern()We removed 145 participants that participated with a mobile device.
df <- filter(df, Mobile == "False")
dftask <- filter(dftask, Participant %in% df$Participant)The experiment’s median duration is 23.90 min (50% CI [18.39, 24.96]).
df |>
mutate(Participant = fct_reorder(Participant, Experiment_Duration),
Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Experiment Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())potentials$expe_duration <- arrange(df, Experiment_Duration) |>
select(Participant, Experiment_Duration) |>
head(5) plot_hist <- function(dat) {
dens <- rbind(
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT1),
Phase="Emotional ratings",
y = y / max(y)),
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT2),
Phase="Reality rating",
y = y / max(y))
)
dat |>
filter(RT1 < 40 & RT2 < 40) |> # Remove very long RTs
# mutate(Participant = fct_reorder(Participant, RT1)) |>
pivot_longer(cols = c(RT1, RT2), names_to = "Phase", values_to = "RT") |>
mutate(Phase = ifelse(Phase == "RT1", "Emotional ratings", "Reality rating")) |>
ggplot(aes(x=RT)) +
geom_area(data=dens, aes(x=x, y=y, fill=Phase), alpha=0.33, position="identity") +
geom_density(aes(color=Phase, y=after_stat(scaled)), linewidth=1.5) +
scale_x_sqrt(breaks=c(0, 2, 5, 10, 20)) +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.line.y = element_blank()) +
labs(title = "Distribution of Response Time for each Participant", x="Response time per stimuli (s)") +
facet_wrap(~Participant, ncol=5, scales="free_y") +
coord_cartesian(xlim = c(0, 25))
}plot_hist(dftask[dftask$Participant %in% df$Participant[1:60], ])plot_hist(dftask[dftask$Participant %in% df$Participant[61:120], ])plot_hist(dftask[dftask$Participant %in% df$Participant[121:180], ])plot_hist(dftask[dftask$Participant %in% df$Participant[181:240], ])plot_hist(dftask[dftask$Participant %in% df$Participant[241:264], ])df |>
mutate(Participant = fct_reorder(Participant, BAIT_Duration),
Category = ifelse(BAIT_Duration > 5, "extra", "ok"),
Duration = ifelse(BAIT_Duration > 5, 5, BAIT_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$BAIT_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#9C27B0", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Questionnaire Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) dat <- dftask |>
filter(Relevance %in% c("Relevant", "Non-erotic")) |>
group_by(Participant, Type) |>
summarise(Arousal = mean(Arousal),
Valence = mean(Valence),
Enticement = mean(Enticement),
.groups = "drop") |>
pivot_wider(names_from = Type, values_from = all_of(c("Arousal", "Valence", "Enticement"))) |>
transmute(Participant = Participant,
Arousal = Arousal_Erotic - `Arousal_Non-erotic`,
Valence = Valence_Erotic - `Valence_Non-erotic`,
Enticement = Enticement_Erotic - `Enticement_Non-erotic`) |>
filter(!is.na(Arousal)) |>
mutate(Participant = fct_reorder(Participant, Arousal))
dat |>
pivot_longer(-Participant) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(x=value, y=Participant, fill=Group)) +
geom_bar(aes(fill=value), stat = "identity") +
scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_diff <- arrange(dat, Arousal) |>
head(5)dat <- dftask |>
summarize(cor_ArVal = cor(Arousal, Valence),
cor_ArEnt = cor(Arousal, Enticement),
.by="Participant") |>
mutate(Participant = fct_reorder(Participant, cor_ArVal))
dat |>
pivot_longer(-Participant) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
mutate(name = fct_relevel(name, "cor_ArVal", "cor_ArEnt"),
name = fct_recode(name, "Arousal - Valence" = "cor_ArVal", "Arousal - Enticement" = "cor_ArEnt")) |>
ggplot(aes(y = Participant, x = value, fill = Group)) +
geom_bar(stat = "identity") +
# scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_cor <- arrange(dat, cor_ArVal) |>
head(5)c(as.character(potentials$expe_duration$Participant),
as.character(potentials$emo_diff$Participant),
as.character(potentials$emo_cor$Participant)) |>
table()
S019 S137 S168 S186 S259 S303 S305 S338 S341 S381 S391 S399 S404
1 1 1 1 1 1 1 1 2 1 1 2 1
df |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = SexualOrientation)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_metro_d() +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Sexual Orientation", fill = "Sexual Orientation") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)We removed 10 participants that were incompatible with further analysis.
df <- filter(df, Sex != "Other" & SexualOrientation != "Other")
dftask <- filter(dftask, Participant %in% df$Participant)show_distribution <- function(dftask, target="Arousal") {
dftask |>
filter(SexualOrientation %in% c("Heterosexual", "Bisexual", "Homosexual")) |>
bayestestR::estimate_density(select=target,
at=c("Relevance", "Category", "Sex", "SexualOrientation"),
method="KernSmooth") |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Relevance, linetype = Category), linewidth=1) +
facet_grid(SexualOrientation~Sex, scales="free_y") +
scale_color_manual(values = c("Relevant" = "red", "Non-erotic" = "blue", "Irrelevant"="darkorange")) +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(face="bold")) +
labs(title = target)
}
(show_distribution(dftask, "Arousal") | show_distribution(dftask, "Valence")) /
(show_distribution(dftask, "Enticement") | show_distribution(dftask, "Realness")) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(title = "Distribution of Appraisals depending on the Sexual Profile",
theme = theme(plot.title = element_text(hjust = 0.5, face="bold"))) We kept only heterosexual participants (79.92%).
df <- filter(df, SexualOrientation == "Heterosexual")
dftask <- filter(dftask, Participant %in% df$Participant)df <- filter(df, !Participant %in% outliers)
dftask <- filter(dftask, Participant %in% df$Participant)The final sample includes 203 participants (Mean age = 37.9, SD = 13.4, range: [18, 80]; Sex: 15.8% females, 84.2% males, 0.0% other; Education: Bachelor, 34.98%; Doctorate, 8.87%; High School, 19.21%; Master, 34.98%; Other, 1.48%; Primary School, 0.49%; Country: 27.09% USA, 16.26% France, 11.82% UK, 44.83% other).
p_country <- dplyr::select(df, region = Country) |>
group_by(region) |>
summarize(n = n()) |>
right_join(map_data("world"), by = "region") |>
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = n)) +
scale_fill_gradientn(colors = c("#FFEB3B", "red", "purple")) +
labs(fill = "N") +
theme_void() +
labs(title = "A Geographically Diverse Sample", subtitle = "Number of participants by country") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2))
)
p_countryggwaffle::waffle_iron(df, ggwaffle::aes_d(group = Ethnicity), rows=10) |>
ggplot(aes(x, y, fill = group)) +
ggwaffle::geom_waffle() +
coord_equal() +
scale_fill_flat_d() +
ggwaffle::theme_waffle() +
labs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2)),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
p_age <- estimate_density(df$Age) |>
normalize(select = y) |>
mutate(y = y * 86) |> # To match the binwidth
ggplot(aes(x = x)) +
geom_histogram(data=df, aes(x = Age), fill = "#616161", bins=28) +
# geom_line(aes(y = y), color = "orange", linewidth=2) +
geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
# geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
theme_modern(axis.title.space = 10) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_agep_edu <- df |>
mutate(Education = fct_relevel(Education, "Other", "Primary School", "High School", "Bachelor", "Master", "Doctorate")) |>
ggplot(aes(x = Education)) +
geom_bar(aes(fill = Education)) +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_viridis_d(guide = "none") +
labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_educolors <- c(
"NA" = "#2196F3", "None" = "#E91E63", "Condoms (for partner)" = "#9C27B0",
"Combined pills" = "#FF9800", "Intrauterine Device (IUD)" = "#FF5722",
"Intrauterine System (IUS)" = "#795548", "Progestogen pills" = "#4CAF50",
"Other" = "#FFC107", "Condoms (female)" = "#607D8B"
)
colors <- colors[names(colors) %in% c("NA", df$BirthControl)]
p_sex <- df |>
mutate(BirthControl = ifelse(Sex == "Male", "NA", BirthControl),
BirthControl = fct_relevel(BirthControl, names(colors))) |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = BirthControl)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_manual(
values = colors,
breaks = names(colors)[2:length(colors)]
) +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Birth Control Method", fill = "Birth Control") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_sexp_sexprofile <- df |>
select(Participant, Sex, SexualOrientation, SexualActivity, COPS_Duration_1, COPS_Frequency_2) |>
pivot_longer(-all_of(c("Participant", "Sex"))) |>
mutate(name = str_replace_all(name, "SexualOrientation", "Sexual Orientation"),
name = str_replace_all(name, "SexualActivity", "Sexual Activity"),
name = str_replace_all(name, "COPS_Duration_1", "Pornography Usage (Duration)"),
name = str_replace_all(name, "COPS_Frequency_2", "Pornography Usage (Frequency)")) |>
ggplot(aes(x = value, fill=Sex)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
labs(title = "Sexual Profile of Participants") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
facet_wrap(~name, scales = "free")
p_sexprofilep_language <- df |>
ggplot(aes(x = Language_Level)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "Language Level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_expertise <- df |>
ggplot(aes(x = AI_Knowledge)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "AI-Expertise") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_language | p_expertisep_country /
(p_age + p_edu)This section pertains to the validation of the BAIT scale measuring beliefs and expectations about artificial creations.
bait <- df |>
select(starts_with("BAIT_"), -BAIT_Duration) |>
rename_with(function(x) gsub("BAIT_\\d_", "", x))
cor <- correlation::correlation(bait, redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()
clean_labels <- function(x) {
x <- str_remove_all(x, "BAIT_") |>
str_replace_all("_", " - ")
}
cor |>
mutate(val = paste0(insight::format_value(r), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, clean_labels),
Parameter2 = fct_relabel(Parameter2, clean_labels)) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = r), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Correlation Matrix",
subtitle = "Beliefs about Artificial Information Technology (BAIT)") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))n <- parameters::n_factors(bait, package = "nFactors")
plot(n)efa <- parameters::factor_analysis(bait, cor=cor(bait), n=2, rotation = "oblimin", sort=TRUE, scores="tenBerge", fm="ml")
plot(efa)display(efa)| Variable | ML2 | ML1 | Complexity | Uniqueness |
|---|---|---|---|---|
| TextRealistic | 0.82 | -0.06 | 1.01 | 0.35 |
| TextIssues | -0.67 | 0.03 | 1.00 | 0.55 |
| ImitatingReality | 0.46 | 0.09 | 1.08 | 0.76 |
| ImagesRealistic | 0.42 | 0.21 | 1.48 | 0.74 |
| EnvironmentReal | 0.40 | 0.31 | 1.89 | 0.69 |
| ImagesIssues | -0.31 | -0.02 | 1.01 | 0.90 |
| VideosIssues | -0.03 | 0.88 | 1.00 | 0.23 |
| VideosRealistic | -0.16 | -0.41 | 1.32 | 0.78 |
The 2 latent factors (oblimin rotation) accounted for 37.34% of the total variance of the original data (ML2 = 23.05%, ML1 = 14.28%).
m1 <- lavaan::cfa(efa_to_cfa(efa, threshold="max"), data=bait)
m2 <- lavaan::cfa(
"G =~ ImitatingReality + EnvironmentReal + VideosIssues + TextIssues + VideosRealistic + ImagesRealistic + ImagesIssues + TextRealistic",
data=bait)
# bayestestR::bayesfactor_models(m1, m2)
lavaan::anova(m1, m2)
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
m1 19 -83.878 -27.5534 76.688
m2 20 -60.656 -7.6446 101.910 25.222 0.34543 1 5.109e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(parameters::parameters(m1))| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| ML2 =~ ImagesRealistic | 1.00 | 0.00 | (1.00, 1.00) | < .001 | |
| ML2 =~ TextRealistic | 2.17 | 0.36 | (1.47, 2.87) | 6.05 | < .001 |
| ML2 =~ ImagesIssues | -0.88 | 0.23 | (-1.33, -0.43) | -3.80 | < .001 |
| ML2 =~ TextIssues | -1.80 | 0.31 | (-2.42, -1.19) | -5.75 | < .001 |
| ML2 =~ ImitatingReality | 1.58 | 0.31 | (0.97, 2.18) | 5.13 | < .001 |
| ML2 =~ EnvironmentReal | 1.49 | 0.29 | (0.92, 2.07) | 5.10 | < .001 |
| ML1 =~ VideosRealistic | 1.00 | 0.00 | (1.00, 1.00) | < .001 | |
| ML1 =~ VideosIssues | -0.79 | 0.29 | (-1.36, -0.22) | -2.74 | 0.006 |
| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| ML2 ~~ ML1 | -6.89e-03 | 2.25e-03 | (-0.01, -2.48e-03) | -3.06 | 0.002 |
Exploratory Graph Analysis (EGA) is a recently developed framework for psychometric assessment, that can be used to estimate the number of dimensions in questionnaire data using network estimation methods and community detection algorithms, and assess the stability of dimensions and items using bootstrapping.
Unique Variable Analysis (Christensen, Garrido, & Golino, 2023) uses the weighted topological overlap measure (Nowick et al., 2009) on an estimated network. Values greater than 0.25 are determined to have considerable local dependence (i.e., redundancy) that should be handled (variables with the highest maximum weighted topological overlap to all other variables (other than the one it is redundant with) should be removed).
uva <- EGAnet::UVA(data = bait, cut.off = 0.3)
uvaVariable pairs with wTO > 0.30 (large-to-very large redundancy)
node_i node_j wto
TextRealistic TextIssues 0.409
----
Variable pairs with wTO > 0.25 (moderate-to-large redundancy)
----
Variable pairs with wTO > 0.20 (small-to-moderate redundancy)
node_i node_j wto
VideosRealistic VideosIssues 0.244
uva$keep_remove$keep
[1] "TextIssues"
$remove
[1] "TextRealistic"
ega <- list()
for(model in c("glasso", "TMFG")) {
for(algo in c("walktrap", "louvain")) {
for(type in c("ega", "ega.fit")) { # "hierega"
if(type=="ega.fit" & algo=="louvain") next # Too slow
ega[[paste0(model, "_", algo, "_", type)]] <- EGAnet::bootEGA(
data = bait,
seed=123,
model=model,
algorithm=algo,
EGA.type=type,
type="resampling",
plot.itemStability=FALSE,
verbose=FALSE)
}
}
}
EGAnet::compare.EGA.plots(
ega$glasso_walktrap_ega, ega$glasso_walktrap_ega.fit,
ega$glasso_louvain_ega, ega$TMFG_louvain_ega,
ega$TMFG_walktrap_ega, ega$TMFG_walktrap_ega.fit,
labels=c("glasso_walktrap_ega", "glasso_walktrap_ega.fit",
"glasso_louvain_ega", "TMFG_louvain_ega",
"TMFG_walktrap_ega", "TMFG_walktrap_ega.fit"),
rows=3,
plot.all = FALSE)$allFigures shows how often each variable is replicating in their empirical structure across bootstraps.
patchwork::wrap_plots(lapply(ega, plot), nrow = 3)ega <- ega$TMFG_walktrap_ega$EGA
plot(ega)ega_scores <- EGAnet::net.scores(bait, ega)$scores$std.scores |>
as.data.frame() |>
setNames(c("EGA1", "EGA2")) # Merge with data
scores <- lavaan::predict(m1) |>
as.data.frame() |>
data_rename(c("ML1", "ML2"), c("BAIT_SEM1", "BAIT_SEM2")) |>
cbind(ega_scores) |>
mutate(Participant = df$Participant) |>
mutate(BAIT = rowMeans(select(bait, -contains("Videos")), na.rm = TRUE))
df <- full_join(df, scores, by="Participant")We computed two type of general scores for the BAIT scale, an empirical score based on the average of observed data (of the most loading items) and a model-based score as predicted by the structural model. The first one gives equal weight to all items (and keeps the same [0-1] range), while the second one is based on the factor loadings and the covariance structure.
correlation::cor_test(scores, "BAIT_SEM2", "BAIT") |>
plot() +
ggside::geom_xsidedensity(aes(x=BAIT_SEM2), color="grey", linewidth=1) +
ggside::geom_ysidedensity(aes(y=BAIT), color="grey", linewidth=1) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggside::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .1) While the two correlate substantially, they have different benefits. The empirical score has a more straightforward meaning and is more reproducible (as it is not based on a model fitted on a specific sample), the model-based score takes into account the relative importance of the contribution of each item to their factor.
table <- correlation::correlation(scores) |>
summary()
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | BAIT | EGA2 | EGA1 | BAIT_SEM1 |
|---|---|---|---|---|
| BAIT_SEM2 | 0.59*** | -0.06 | 0.98*** | -0.52*** |
| BAIT_SEM1 | -0.29*** | 0.16 | -0.47*** | |
| EGA1 | 0.63*** | -0.03 | ||
| EGA2 | 0.12 |
table <- correlation::correlation(scores,
select(df, starts_with("GAAIS")),
bayesian=TRUE) |>
summary()Warning in genhypergeo_series_pos(U = c((n - 1)/2, (n - 1)/2), L = ((n + :
Series not converged.
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | GAAIS_Negative_10 | GAAIS_Positive_17 | GAAIS_Positive_7 | GAAIS_Negative_15 | GAAIS_Negative_9 | GAAIS_Positive_12 |
|---|---|---|---|---|---|---|
| BAIT_SEM2 | -0.15* | 0.25*** | 0.30*** | -0.11 | 0.14* | 0.23*** |
| BAIT_SEM1 | -0.04 | -0.03 | 0.05 | -0.16** | -0.10 | 0.06 |
| EGA1 | -0.15* | 0.25*** | 0.31*** | -0.11 | 0.13* | 0.25*** |
| EGA2 | 0.07 | -0.08 | -0.02 | 0.04 | 0.01 | -0.03 |
| BAIT | 6.13e-03 | 0.25*** | 0.14* | 0.02 | 0.15* | 0.12 |
df |>
ggplot(aes(x=as.factor(AI_Knowledge), y=BAIT)) +
geom_boxplot()# m <- betareg::betareg(BAIT ~ AI_Knowledge, data=df)
m <- lm(BAIT ~ AI_Knowledge, data=df)
# m <- brms::brm(BAIT ~ mo(AI_Knowledge), data=df, algorithm = "meanfield")
# m <- brms::brm(BAIT ~ AI_Knowledge, data=dfsub, algorithm = "meanfield")
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(201) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.53 | 0.02 | (0.48, 0.58) | 21.94 | < .001 |
| AI Knowledge | 0.01 | 6.26e-03 | (2.59e-03, 0.03) | 2.39 | 0.018 |
marginaleffects::predictions(m, by=c("AI_Knowledge"), newdata = "marginalmeans") |>
as.data.frame() |>
ggplot(aes(x=AI_Knowledge, y=estimate)) +
geom_jitter2(data=df, aes(y=BAIT), alpha=0.2, width=0.1) +
geom_line(aes(group=1), position = position_dodge(width=0.2)) +
geom_pointrange(aes(ymin = conf.low, ymax=conf.high), position = position_dodge(width=0.2)) +
theme_minimal() +
labs(x = "AI-Knowledge", y="BAIT Score")# m <- betareg::betareg(BAIT ~ Sex / Age, data=df, na.action=na.omit)
m <- lm(BAIT ~ Sex / Age, data=df)
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(199) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.62 | 0.05 | (0.53, 0.71) | 13.13 | < .001 |
| Sex (Male) | -0.03 | 0.05 | (-0.14, 0.07) | -0.63 | 0.531 |
| Sex (Female) × Age | 1.27e-04 | 1.36e-03 | (-2.55e-03, 2.80e-03) | 0.09 | 0.925 |
| Sex (Male) × Age | -2.43e-04 | 6.13e-04 | (-1.45e-03, 9.67e-04) | -0.40 | 0.693 |
glm(Feedback_LabelsIncorrect ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_LabelsIncorrect = ifelse(Feedback_LabelsIncorrect=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are Incorrect'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -1.01 | 2.84 | (-6.67, 4.55) | -0.35 | 0.723 |
| BAIT | 0.76 | 4.92 | (-8.92, 10.53) | 0.15 | 0.877 |
| AI Knowledge | 0.30 | 0.74 | (-1.15, 1.78) | 0.40 | 0.685 |
| BAIT × AI Knowledge | -0.33 | 1.27 | (-2.85, 2.16) | -0.26 | 0.796 |
glm(Feedback_LabelsReversed ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_LabelsReversed = ifelse(Feedback_LabelsReversed=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are reversed'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -11.91 | 6.13 | (-24.51, -0.64) | -1.94 | 0.052 |
| BAIT | 16.34 | 10.07 | (-2.61, 36.70) | 1.62 | 0.105 |
| AI Knowledge | 2.11 | 1.58 | (-0.86, 5.27) | 1.34 | 0.181 |
| BAIT × AI Knowledge | -3.70 | 2.62 | (-8.96, 1.23) | -1.41 | 0.159 |
glm(Feedback_CouldDiscriminate ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_CouldDiscriminate = ifelse(Feedback_CouldDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Easy to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -4.25 | 7.69 | (-19.91, 9.87) | -0.55 | 0.581 |
| BAIT | 1.92 | 13.94 | (-24.79, 29.12) | 0.14 | 0.891 |
| AI Knowledge | 0.91 | 2.07 | (-2.90, 5.04) | 0.44 | 0.660 |
| BAIT × AI Knowledge | -1.70 | 3.75 | (-9.13, 5.18) | -0.45 | 0.651 |
glm(Feedback_CouldNotDiscriminate ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_CouldNotDiscriminate = ifelse(Feedback_CouldNotDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Hard to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -3.07 | 2.97 | (-9.08, 2.66) | -1.04 | 0.301 |
| BAIT | 5.96 | 5.13 | (-3.93, 16.35) | 1.16 | 0.245 |
| AI Knowledge | 0.20 | 0.78 | (-1.32, 1.75) | 0.26 | 0.798 |
| BAIT × AI Knowledge | -0.68 | 1.32 | (-3.32, 1.90) | -0.52 | 0.606 |
glm(Feedback_Fun ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_Fun = ifelse(Feedback_Fun=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Fun'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -1.39 | 2.97 | (-7.27, 4.49) | -0.47 | 0.640 |
| BAIT | 4.33 | 5.23 | (-5.92, 14.73) | 0.83 | 0.407 |
| AI Knowledge | 0.13 | 0.77 | (-1.41, 1.64) | 0.16 | 0.871 |
| BAIT × AI Knowledge | -0.47 | 1.33 | (-3.10, 2.17) | -0.36 | 0.722 |
glm(Feedback_Boring ~ BAIT * AI_Knowledge,
data=mutate(df, Feedback_Boring = ifelse(Feedback_Boring=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Boring'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -4.35 | 3.69 | (-11.93, 2.63) | -1.18 | 0.239 |
| BAIT | 3.82 | 6.30 | (-8.31, 16.56) | 0.61 | 0.545 |
| AI Knowledge | 0.76 | 0.93 | (-1.01, 2.66) | 0.82 | 0.414 |
| BAIT × AI Knowledge | -0.90 | 1.57 | (-4.09, 2.12) | -0.57 | 0.567 |
write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data.csv", row.names = FALSE)
Comments
Code